Searching 487 files for "QLearner"

/Users/dabel/Research/simple_rl/MANIFEST:
   20  simple_rl/agents/AgentClass.py
   21  simple_rl/agents/FixedPolicyAgentClass.py
   22: simple_rl/agents/QLearnerAgentClass.py
   23  simple_rl/agents/RMaxAgentClass.py
   24  simple_rl/agents/RandomAgentClass.py
   ..
   27  simple_rl/agents/bandits/__init__.py
   28  simple_rl/agents/func_approx/GradientBoostingAgentClass.py
   29: simple_rl/agents/func_approx/LinearQLearnerAgentClass.py
   30  simple_rl/agents/func_approx/LinearSarsaAgentClass.py
   31  simple_rl/agents/func_approx/__init__.py

/Users/dabel/Research/simple_rl/README.md:
   29  	from simple_rl.run_experiments import run_agents_on_mdp
   30  	from simple_rl.tasks import GridWorldMDP
   31: 	from simple_rl.agents import QLearnerAgent
   32  
   33  	# Run Experiment
   34  	mdp = GridWorldMDP()
   35: 	agent = QLearnerAgent(mdp.get_actions())
   36  	run_agents_on_mdp([agent], mdp)
   37  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/__init__.py:
    3  	agents/
    4  		AgentClass.py
    5: 		QLearnerAgentClass.py
    6  		RandomAgentClass.py
    7  		RMaxAgentClass.py

/Users/dabel/Research/simple_rl/build/lib/simple_rl/run_experiments.py:
   29  from simple_rl.experiments import Experiment
   30  from simple_rl.mdp import MarkovGameMDP
   31: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   32  
   33  
   ..
  456  
  457      # Setup agents.
  458:     from simple_rl.agents import RandomAgent, QLearnerAgent
  459      
  460      random_agent = RandomAgent(actions)
  461:     qlearner_agent = QLearnerAgent(actions, gamma=gamma, explore="uniform")
  462:     agents = [qlearner_agent, random_agent]
  463  
  464      # Run Agents.
  465      if isinstance(mdp, MarkovGameMDP):
  466          # Markov Game.
  467:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  468          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  469      else:

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/__init__.py:
    3  
    4  	AgentClass: Contains the basic skeleton of an RL Agent.
    5: 	QLearnerAgentClass: QLearner.
    6: 	LinearQLearnerAgentClass: Q Learner with a Linear Approximator.
    7  	RandomAgentClass: Random actor.
    8  	RMaxAgentClass: RMax.
    .
   13  from AgentClass import Agent
   14  from FixedPolicyAgentClass import FixedPolicyAgent
   15: from QLearnerAgentClass import QLearnerAgent
   16  from DoubleQAgentClass import DoubleQAgent
   17  from DelayedQAgentClass import DelayedQAgent
   ..
   19  from RMaxAgentClass import RMaxAgent
   20  
   21: from func_approx.LinearQLearnerAgentClass import LinearQLearnerAgent
   22  from func_approx.LinearSarsaAgentClass import LinearSarsaAgent
   23  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/DoubleQAgentClass.py:
   13  
   14  # Other imports
   15: from simple_rl.agents.QLearnerAgentClass import QLearnerAgent
   16  from simple_rl.agents.AgentClass import Agent
   17  
   18: class DoubleQAgent(QLearnerAgent):
   19      ''' Class for an agent using Double Q Learning. '''
   20  
   ..
   29              explore (str): One of {softmax, uniform}. Denotes explore policy.
   30          '''
   31:         QLearnerAgent.__init__(self, actions, name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   32  
   33          # Make two q functions.

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/GradientBoostingAgentClass.py:
   18  
   19  # simple_rl classes.
   20: from QLearnerAgentClass import QLearnerAgent
   21  from AgentClass import Agent
   22  
   23: class GradientBoostingAgent(QLearnerAgent):
   24      '''
   25:     QLearnerAgent that uses gradient boosting with additive regression trees to approximate the Q Function.
   26      '''
   27  
   28      def __init__(self, actions, name="grad_boost", gamma=0.95, explore="softmax", markov_window=20):
   29          name += "-m" if markov_window > 0 else ""
   30:         QLearnerAgent.__init__(self, actions=actions, name=name, gamma=gamma, explore=explore)
   31          self.weak_learners = []
   32          self.model = []

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/LinearApproxQLearnerAgentClass.py:
    1  '''
    2: LinearApproxQLearnerAgentClass.py
    3  
    4  Contains implementation for a Q Learner with a Linear Function Approximator.
    .
    6  
    7  # Local classes
    8: from QLearnerAgentClass import QLearnerAgent
    9  
   10  # Python imports.
   ..
   12  import math
   13  
   14: class LinearApproxQLearnerAgent(QLearnerAgent):
   15      '''
   16:     QLearnerAgent with a linear function approximator for the Q Function.
   17      '''
   18  
   19      def __init__(self, actions, name="lin_q_approx", alpha=0.05, gamma=0.95, epsilon=0.01, explore="uniform", rbf=False):
   20          self.name = "linear-" + explore
   21:         QLearnerAgent.__init__(self, actions=list(actions), name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore)
   22          self.num_features = 0
   23          self.rbf = rbf

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/QLearnerAgentClass.py:
   10  from simple_rl.agents.AgentClass import Agent
   11  
   12: class QLearnerAgent(Agent):
   13      ''' Implementation for a Q Learning Agent '''
   14  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/GradientBoostingAgentClass.py:
   22  
   23  # simple_rl classes.
   24: from ..QLearnerAgentClass import QLearnerAgent
   25  
   26: class GradientBoostingAgent(QLearnerAgent):
   27      '''
   28:     QLearnerAgent that uses gradient boosting with additive regression trees to approximate the Q Function.
   29      '''
   30  
   31      def __init__(self, actions, name="grad_boost", gamma=0.99, explore="softmax", markov_window=20, update_window=500):
   32          name += "-m" if markov_window > 0 else ""
   33:         QLearnerAgent.__init__(self, actions=actions, name=name, gamma=gamma, explore=explore)
   34          self.weak_learners = []
   35          self.model = []

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/LinearQLearnerAgentClass.py:
    1  '''
    2: LinearQLearnerAgentClass.py
    3  
    4  Contains implementation for a Q Learner with a Linear Function Approximator.
    .
   10  
   11  # Other imports.
   12: from simple_rl.agents import Agent, QLearnerAgent
   13  
   14: class LinearQLearnerAgent(QLearnerAgent):
   15      '''
   16:     QLearnerAgent with a linear function approximator for the Q Function.
   17      '''
   18  
   19      def __init__(self, actions, num_features, rand_init=True, name="Linear-Q", alpha=0.2, gamma=0.99, epsilon=0.2, explore="uniform", rbf=False, anneal=True):
   20          name = name + "-rbf" if rbf else name
   21:         QLearnerAgent.__init__(self, actions=list(actions), name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   22          self.num_features = num_features
   23          # Add a basis feature.
   ..
  125      def reset(self):
  126          self.weights = np.zeros(self.num_features*len(self.actions))
  127:         QLearnerAgent.reset(self)
  128  
  129  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/LinearSarsaAgentClass.py:
   10  
   11  # Other imports.
   12: from simple_rl.agents.func_approx.LinearQLearnerAgentClass import LinearQLearnerAgent
   13  
   14: class LinearSarsaAgent(LinearQLearnerAgent):
   15      '''
   16      Sarsa Agent with a linear function approximator for the Q Function.
   ..
   19      def __init__(self, actions, num_features, rand_init=False, name="Linear-SARSA", alpha=0.05, gamma=0.99, epsilon=0.01, explore="uniform", rbf=False, anneal=True):
   20          name = name + "-rbf" if (name == "sarsa-linear" and rbf) else name
   21:         LinearQLearnerAgent.__init__(self, actions=list(actions), rand_init=rand_init, num_features=num_features, name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   22  
   23      def act(self, state, reward):

/Users/dabel/Research/simple_rl/build/scripts-2.7/run_experiments.py:
   29  from simple_rl.experiments import Experiment
   30  from simple_rl.mdp import MarkovGameMDP
   31: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   32  
   33  
   ..
  456  
  457      # Setup agents.
  458:     from simple_rl.agents import RandomAgent, QLearnerAgent
  459      
  460      random_agent = RandomAgent(actions)
  461:     qlearner_agent = QLearnerAgent(actions, gamma=gamma, explore="uniform")
  462:     agents = [qlearner_agent, random_agent]
  463  
  464      # Run Agents.
  465      if isinstance(mdp, MarkovGameMDP):
  466          # Markov Game.
  467:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  468          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  469      else:

/Users/dabel/Research/simple_rl/dist/simple_rl-0.77-py2.py3-none-any.whl:
    <binary>

/Users/dabel/Research/simple_rl/examples/abstraction_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import QLearnerAgent, RandomAgent
    9  from simple_rl.tasks import GridWorldMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   14      # Setup MDP, Agents.
   15      mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
   16:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   17      rand_agent = RandomAgent(actions=mdp.get_actions())
   18:     abstr_identity_agent = AbstractionWrapper(QLearnerAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions())
   19  
   20      # Run experiment and make plot.

/Users/dabel/Research/simple_rl/examples/bandit_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import LinUCBAgent, QLearnerAgent, RandomAgent
    9  from simple_rl.tasks import BanditMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   15  
   16      lin_agent = LinUCBAgent(actions=mdp.get_actions())
   17:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   18      rand_agent = RandomAgent(actions=mdp.get_actions())
   19  

/Users/dabel/Research/simple_rl/examples/blank_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import QLearnerAgent, RandomAgent, DoubleQAgent
    9  from simple_rl.tasks import FourRoomMDP, ComboLockMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   17  
   18      dq_agent = DoubleQAgent(actions=mdp.get_actions())
   19:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   20      rand_agent = RandomAgent(actions=mdp.get_actions())
   21  

/Users/dabel/Research/simple_rl/examples/examples_overview.ipynb:
   35      "sys.path.insert(0, parent_dir)\n",
   36      "\n",
   37:     "from simple_rl.agents import QLearnerAgent, RandomAgent\n",
   38      "from simple_rl.tasks import GridWorldMDP\n",
   39      "from simple_rl.run_experiments import run_agents_on_mdp"
   ..
   60      "\n",
   61      "# Setup Agents.\n",
   62:     "ql_agent = QLearnerAgent(actions=mdp.get_actions()) \n",
   63      "rand_agent = RandomAgent(actions=mdp.get_actions())"
   64     ]
   ..
   84        "\tgridworld_h-6_w-6\n",
   85        "(Agents)\n",
   86:       "\tqlearner\n",
   87        "\trandom\n",
   88        "(Params)\n",
   ..
   91        "\tsteps : 40\n",
   92        "\n",
   93:       "qlearner is learning.\n",
   94        "  Instance 1 of 5.\n",
   95        "  Instance 2 of 5.\n",
   ..
  107        "\n",
  108        "--- TIMES ---\n",
  109:       "qlearner agent took 0.89 seconds.\n",
  110        "random agent took 0.35 seconds.\n",
  111        "-------------\n",
  ...
  152        "(Agents)\n",
  153        "\trmax-h3\n",
  154:       "\tqlearner\n",
  155        "\trandom\n",
  156        "(Params)\n",
  ...
  166        "  Instance 5 of 5.\n",
  167        "\n",
  168:       "qlearner is learning.\n",
  169        "  Instance 1 of 5.\n",
  170        "  Instance 2 of 5.\n",
  ...
  184        "random agent took 0.27 seconds.\n",
  185        "rmax-h3 agent took 70.88 seconds.\n",
  186:       "qlearner agent took 0.75 seconds.\n",
  187        "-------------\n",
  188        "\n"
  ...
  380      "from simple_rl.tasks import TaxiOOMDP\n",
  381      "from simple_rl.run_experiments import run_agents_on_mdp\n",
  382:     "from simple_rl.agents import QLearnerAgent, RandomAgent\n",
  383      "\n",
  384      "# Taxi initial state attributes..\n",
  ...
  388      "\n",
  389      "# Make agents.\n",
  390:     "ql_agent = QLearnerAgent(actions=taxi_mdp.get_actions()) \n",
  391      "rand_agent = RandomAgent(actions=taxi_mdp.get_actions())"
  392     ]
  ...
  412        "\ttaxi_h-4_w-4\n",
  413        "(Agents)\n",
  414:       "\tqlearner\n",
  415        "\trandom\n",
  416        "(Params)\n",
  ...
  419        "\tsteps : 150\n",
  420        "\n",
  421:       "qlearner is learning.\n",
  422        "  Instance 1 of 5.\n",
  423        "  Instance 2 of 5.\n",
  ...
  436        "--- TIMES ---\n",
  437        "random agent took 7.68 seconds.\n",
  438:       "qlearner agent took 15.32 seconds.\n",
  439        "-------------\n",
  440        "\n"
  ...
  493        "(Agents)\n",
  494        "\tfixed-policy\n",
  495:       "\tqlearner\n",
  496        "(Params)\n",
  497        "\tinstances : 10\n",
  ...
  523     "source": [
  524      "from simple_rl.run_experiments import play_markov_game\n",
  525:     "from simple_rl.agents import QLearnerAgent, FixedPolicyAgent\n",
  526      "from simple_rl.tasks import RockPaperScissorsMDP\n",
  527      "\n",
  ...
  530      "# Setup MDP, Agents.\n",
  531      "markov_game = RockPaperScissorsMDP()\n",
  532:     "ql_agent = QLearnerAgent(actions=markov_game.get_actions(), epsilon=0.2) \n",
  533      "fixed_action = random.choice(markov_game.get_actions())\n",
  534      "fixed_agent = FixedPolicyAgent(policy=lambda s:fixed_action)\n",
  ...
  604     "source": [
  605      "from simple_rl.tasks import GymMDP\n",
  606:     "from simple_rl.agents import LinearQLearnerAgent, RandomAgent\n",
  607      "from simple_rl.run_experiments import run_agents_on_mdp\n",
  608      "\n",
  ...
  612      "\n",
  613      "# Setup agents and run.\n",
  614:     "lin_agent = LinearQLearnerAgent(gym_mdp.get_actions(), num_features=num_feats, alpha=0.2, epsilon=0.4, rbf=True)\n",
  615      "\n",
  616      "run_agents_on_mdp([lin_agent], gym_mdp, instances=3, episodes=1, steps=50)"

/Users/dabel/Research/simple_rl/examples/goal_based_options_example.py:
    7  import examples.srl_example_setup
    8  from simple_rl.utils import make_mdp
    9: from simple_rl.agents import QLearnerAgent, RandomAgent
   10  from simple_rl.run_experiments import run_agents_multi_task
   11  from simple_rl.abstraction import AbstractionWrapper, aa_helpers, ActionAbstraction
   ..
   14      # Setup MDP, Agents.
   15      mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
   16:     ql_agent = QLearnerAgent(actions=mdp_distr.get_actions())
   17      rand_agent = RandomAgent(actions=mdp_distr.get_actions())
   18  
   ..
   20      goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
   21      goal_based_aa = ActionAbstraction(prim_actions=mdp_distr.get_actions(), options=goal_based_options)
   22:     option_agent = AbstractionWrapper(QLearnerAgent, actions=mdp_distr.get_actions(), action_abstr=goal_based_aa)
   23  
   24      # Run experiment and make plot.

/Users/dabel/Research/simple_rl/examples/gym_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import LinearQLearnerAgent, RandomAgent
   10  from simple_rl.tasks import GymMDP
   11  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   17  
   18      # Setup agents and run.
   19:     lin_agent = LinearQLearnerAgent(gym_mdp.get_actions(), num_features=num_feats, rbf=True, alpha=0.1, epsilon=0.1, anneal=True)
   20      rand_agent = RandomAgent(gym_mdp.get_actions())
   21      run_agents_on_mdp([lin_agent, rand_agent], gym_mdp, instances=50, episodes=50, steps=100, open_plot=open_plot)

/Users/dabel/Research/simple_rl/examples/markov_game_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   10  from simple_rl.tasks import RockPaperScissorsMDP
   11  from simple_rl.run_experiments import play_markov_game 
   ..
   14      # Setup MDP, Agents.
   15      markov_game = RockPaperScissorsMDP()
   16:     ql_agent = QLearnerAgent(actions=markov_game.get_actions())
   17      fixed_action = random.choice(markov_game.get_actions())
   18      fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action)

/Users/dabel/Research/simple_rl/examples/multitask_example.py:
    8  from simple_rl.mdp import MDPDistribution
    9  from simple_rl.tasks import GridWorldMDP, RandomMDP, ChainMDP, TaxiOOMDP, FourRoomMDP
   10: from simple_rl.agents import QLearnerAgent, RandomAgent, RMaxAgent
   11  from simple_rl.run_experiments import run_agents_multi_task
   12  from simple_rl.utils import make_mdp
   ..
   15      # Make MDP distribution, agents.
   16      mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
   17:     ql_agent = QLearnerAgent(actions=mdp_distr.get_actions())
   18      rand_agent = RandomAgent(actions=mdp_distr.get_actions())
   19  

/Users/dabel/Research/simple_rl/examples/new_mdp_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import QLearnerAgent, RandomAgent
   10  from simple_rl.tasks import GridWorldMDP, GridWorldState
   11  from simple_rl.run_experiments import run_agents_on_mdp 
   ..
   83      # Setup MDP, Agents.
   84      mdp = ColoredGridWorldMDP(state_colors)
   85:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   86      rand_agent = RandomAgent(actions=mdp.get_actions())
   87  

/Users/dabel/Research/simple_rl/examples/.ipynb_checkpoints/examples_overview-checkpoint.ipynb:
   84        "\tgridworld_h-6_w-6\n",
   85        "(Agents)\n",
   86:       "\tqlearner\n",
   87        "\trandom\n",
   88        "(Params)\n",
   ..
   91        "\tsteps : 40\n",
   92        "\n",
   93:       "qlearner is learning.\n",
   94        "  Instance 1 of 5.\n",
   95        "  Instance 2 of 5.\n",
   ..
  107        "\n",
  108        "--- TIMES ---\n",
  109:       "qlearner agent took 1.25 seconds.\n",
  110        "random agent took 0.38 seconds.\n",
  111        "-------------\n",
  ...
  152        "(Agents)\n",
  153        "\trmax-h3\n",
  154:       "\tqlearner\n",
  155        "\trandom\n",
  156        "(Params)\n",
  ...
  166        "  Instance 5 of 5.\n",
  167        "\n",
  168:       "qlearner is learning.\n",
  169        "  Instance 1 of 5.\n",
  170        "  Instance 2 of 5.\n",
  ...
  184        "random agent took 0.27 seconds.\n",
  185        "rmax-h3 agent took 70.88 seconds.\n",
  186:       "qlearner agent took 0.75 seconds.\n",
  187        "-------------\n",
  188        "\n"
  ...
  412        "\ttaxi_h-4_w-4\n",
  413        "(Agents)\n",
  414:       "\tqlearner\n",
  415        "\trandom\n",
  416        "(Params)\n",
  ...
  419        "\tsteps : 150\n",
  420        "\n",
  421:       "qlearner is learning.\n",
  422        "  Instance 1 of 5.\n",
  423        "  Instance 2 of 5.\n",
  ...
  436        "--- TIMES ---\n",
  437        "random agent took 7.68 seconds.\n",
  438:       "qlearner agent took 15.32 seconds.\n",
  439        "-------------\n",
  440        "\n"
  ...
  493        "(Agents)\n",
  494        "\tfixed-policy\n",
  495:       "\tqlearner\n",
  496        "(Params)\n",
  497        "\tinstances : 10\n",

/Users/dabel/Research/simple_rl/examples/results/multitask-fourroom_h-9_w-9/parameters.txt:
    2  	multitask-fourroom_h-9_w-9
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

/Users/dabel/Research/simple_rl/results/chainmdp-4/parameters.txt:
    2  	chainmdp-4
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/chainmdp-5/parameters.txt:
    2  	chainmdp-5
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-10_w-10/parameters.txt:
    2  	gridworld_h-10_w-10
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-5_w-5/parameters.txt:
    2  	gridworld_h-5_w-5
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-6_w-6/parameters.txt:
    2  	gridworld_h-6_w-6
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-8_w-8/parameters.txt:
    2  	gridworld_h-8_w-8
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/in_pap_taxi_h-10_w-10/parameters.txt:
    2  	taxi_h-10_w-10
    3  (Agents)
    4: 	qlearner-softmax
    5: 	qlearner-uniform
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/results/pruning-taxi_h-10_w-10/parameters.txt:
    3  (Agents)
    4  	rmax-h2
    5: 	qlearner-softmax
    6: 	qlearner-uniform
    7  	random
    8  (Params)

/Users/dabel/Research/simple_rl/results/taxi_h-10_w-10/parameters.txt:
    2  	taxi_h-10_w-10
    3  (Agents)
    4: 	qlearner-uniform
    5: 	qlearner-softmax
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/results/taxi_h-5_w-5/parameters.txt:
    2  	taxi_h-5_w-5
    3  (Agents)
    4: 	qlearner-uniform
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/simple_rl/run_experiments.py:
  457      
  458      random_agent = RandomAgent(actions)
  459:     qlearner_agent = QLearningAgent(actions, gamma=gamma, explore="uniform")
  460:     agents = [qlearner_agent, random_agent]
  461  
  462      # Run Agents.
  463      if isinstance(mdp, MarkovGameMDP):
  464          # Markov Game.
  465:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  466          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  467      else:

/Users/dabel/Research/simple_rl/simple_rl/agents/__init__.py:
    3  
    4  	AgentClass: Contains the basic skeleton of an RL Agent.
    5: 	QLearningAgentClass: QLearner.
    6  	LinearQLearningAgentClass: Q Learner with a Linear Approximator.
    7  	RandomAgentClass: Random actor.

/Users/dabel/Research/simple_rl/simple_rl/experiments/results/gridworld_h-10_w-10/parameters.txt:
    2  	gridworld_h-10_w-10
    3  (Agents)
    4: 	qlearner-uniform
    5: 	qlearner-softmax
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/tests/results/gridworld_h-3_w-2/parameters.txt:
    2  	gridworld_h-3_w-2
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

/Users/dabel/Research/simple_rl/tests/results/multitask-fourroom_h-7_w-7/parameters.txt:
    2  	multitask-fourroom_h-7_w-7
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

134 matches across 40 files


Searching 487 files for "QLearner"

/Users/dabel/Research/simple_rl/MANIFEST:
   20  simple_rl/agents/AgentClass.py
   21  simple_rl/agents/FixedPolicyAgentClass.py
   22: simple_rl/agents/QLearnerAgentClass.py
   23  simple_rl/agents/RMaxAgentClass.py
   24  simple_rl/agents/RandomAgentClass.py
   ..
   27  simple_rl/agents/bandits/__init__.py
   28  simple_rl/agents/func_approx/GradientBoostingAgentClass.py
   29: simple_rl/agents/func_approx/LinearQLearnerAgentClass.py
   30  simple_rl/agents/func_approx/LinearSarsaAgentClass.py
   31  simple_rl/agents/func_approx/__init__.py

/Users/dabel/Research/simple_rl/README.md:
   33  	# Run Experiment
   34  	mdp = GridWorldMDP()
   35: 	agent = QLearnerAgent(mdp.get_actions())
   36  	run_agents_on_mdp([agent], mdp)
   37  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/__init__.py:
    3  	agents/
    4  		AgentClass.py
    5: 		QLearnerAgentClass.py
    6  		RandomAgentClass.py
    7  		RMaxAgentClass.py

/Users/dabel/Research/simple_rl/build/lib/simple_rl/run_experiments.py:
   29  from simple_rl.experiments import Experiment
   30  from simple_rl.mdp import MarkovGameMDP
   31: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   32  
   33  
   ..
  456  
  457      # Setup agents.
  458:     from simple_rl.agents import RandomAgent, QLearnerAgent
  459      
  460      random_agent = RandomAgent(actions)
  461:     qlearner_agent = QLearnerAgent(actions, gamma=gamma, explore="uniform")
  462:     agents = [qlearner_agent, random_agent]
  463  
  464      # Run Agents.
  465      if isinstance(mdp, MarkovGameMDP):
  466          # Markov Game.
  467:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  468          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  469      else:

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/__init__.py:
    3  
    4  	AgentClass: Contains the basic skeleton of an RL Agent.
    5: 	QLearnerAgentClass: QLearner.
    6: 	LinearQLearnerAgentClass: Q Learner with a Linear Approximator.
    7  	RandomAgentClass: Random actor.
    8  	RMaxAgentClass: RMax.
    .
   13  from AgentClass import Agent
   14  from FixedPolicyAgentClass import FixedPolicyAgent
   15: from QLearnerAgentClass import QLearnerAgent
   16  from DoubleQAgentClass import DoubleQAgent
   17  from DelayedQAgentClass import DelayedQAgent
   ..
   19  from RMaxAgentClass import RMaxAgent
   20  
   21: from func_approx.LinearQLearnerAgentClass import LinearQLearnerAgent
   22  from func_approx.LinearSarsaAgentClass import LinearSarsaAgent
   23  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/DoubleQAgentClass.py:
   13  
   14  # Other imports
   15: from simple_rl.agents.QLearnerAgentClass import QLearnerAgent
   16  from simple_rl.agents.AgentClass import Agent
   17  
   18: class DoubleQAgent(QLearnerAgent):
   19      ''' Class for an agent using Double Q Learning. '''
   20  
   ..
   29              explore (str): One of {softmax, uniform}. Denotes explore policy.
   30          '''
   31:         QLearnerAgent.__init__(self, actions, name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   32  
   33          # Make two q functions.

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/GradientBoostingAgentClass.py:
   18  
   19  # simple_rl classes.
   20: from QLearnerAgentClass import QLearnerAgent
   21  from AgentClass import Agent
   22  
   23: class GradientBoostingAgent(QLearnerAgent):
   24      '''
   25:     QLearnerAgent that uses gradient boosting with additive regression trees to approximate the Q Function.
   26      '''
   27  
   28      def __init__(self, actions, name="grad_boost", gamma=0.95, explore="softmax", markov_window=20):
   29          name += "-m" if markov_window > 0 else ""
   30:         QLearnerAgent.__init__(self, actions=actions, name=name, gamma=gamma, explore=explore)
   31          self.weak_learners = []
   32          self.model = []

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/LinearApproxQLearnerAgentClass.py:
    1  '''
    2: LinearApproxQLearnerAgentClass.py
    3  
    4  Contains implementation for a Q Learner with a Linear Function Approximator.
    .
    6  
    7  # Local classes
    8: from QLearnerAgentClass import QLearnerAgent
    9  
   10  # Python imports.
   ..
   12  import math
   13  
   14: class LinearApproxQLearnerAgent(QLearnerAgent):
   15      '''
   16:     QLearnerAgent with a linear function approximator for the Q Function.
   17      '''
   18  
   19      def __init__(self, actions, name="lin_q_approx", alpha=0.05, gamma=0.95, epsilon=0.01, explore="uniform", rbf=False):
   20          self.name = "linear-" + explore
   21:         QLearnerAgent.__init__(self, actions=list(actions), name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore)
   22          self.num_features = 0
   23          self.rbf = rbf

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/QLearnerAgentClass.py:
   10  from simple_rl.agents.AgentClass import Agent
   11  
   12: class QLearnerAgent(Agent):
   13      ''' Implementation for a Q Learning Agent '''
   14  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/GradientBoostingAgentClass.py:
   22  
   23  # simple_rl classes.
   24: from ..QLearnerAgentClass import QLearnerAgent
   25  
   26: class GradientBoostingAgent(QLearnerAgent):
   27      '''
   28:     QLearnerAgent that uses gradient boosting with additive regression trees to approximate the Q Function.
   29      '''
   30  
   31      def __init__(self, actions, name="grad_boost", gamma=0.99, explore="softmax", markov_window=20, update_window=500):
   32          name += "-m" if markov_window > 0 else ""
   33:         QLearnerAgent.__init__(self, actions=actions, name=name, gamma=gamma, explore=explore)
   34          self.weak_learners = []
   35          self.model = []

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/LinearQLearnerAgentClass.py:
    1  '''
    2: LinearQLearnerAgentClass.py
    3  
    4  Contains implementation for a Q Learner with a Linear Function Approximator.
    .
   10  
   11  # Other imports.
   12: from simple_rl.agents import Agent, QLearnerAgent
   13  
   14: class LinearQLearnerAgent(QLearnerAgent):
   15      '''
   16:     QLearnerAgent with a linear function approximator for the Q Function.
   17      '''
   18  
   19      def __init__(self, actions, num_features, rand_init=True, name="Linear-Q", alpha=0.2, gamma=0.99, epsilon=0.2, explore="uniform", rbf=False, anneal=True):
   20          name = name + "-rbf" if rbf else name
   21:         QLearnerAgent.__init__(self, actions=list(actions), name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   22          self.num_features = num_features
   23          # Add a basis feature.
   ..
  125      def reset(self):
  126          self.weights = np.zeros(self.num_features*len(self.actions))
  127:         QLearnerAgent.reset(self)
  128  
  129  

/Users/dabel/Research/simple_rl/build/lib/simple_rl/agents/func_approx/LinearSarsaAgentClass.py:
   10  
   11  # Other imports.
   12: from simple_rl.agents.func_approx.LinearQLearnerAgentClass import LinearQLearnerAgent
   13  
   14: class LinearSarsaAgent(LinearQLearnerAgent):
   15      '''
   16      Sarsa Agent with a linear function approximator for the Q Function.
   ..
   19      def __init__(self, actions, num_features, rand_init=False, name="Linear-SARSA", alpha=0.05, gamma=0.99, epsilon=0.01, explore="uniform", rbf=False, anneal=True):
   20          name = name + "-rbf" if (name == "sarsa-linear" and rbf) else name
   21:         LinearQLearnerAgent.__init__(self, actions=list(actions), rand_init=rand_init, num_features=num_features, name=name, alpha=alpha, gamma=gamma, epsilon=epsilon, explore=explore, anneal=anneal)
   22  
   23      def act(self, state, reward):

/Users/dabel/Research/simple_rl/build/scripts-2.7/run_experiments.py:
   29  from simple_rl.experiments import Experiment
   30  from simple_rl.mdp import MarkovGameMDP
   31: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   32  
   33  
   ..
  456  
  457      # Setup agents.
  458:     from simple_rl.agents import RandomAgent, QLearnerAgent
  459      
  460      random_agent = RandomAgent(actions)
  461:     qlearner_agent = QLearnerAgent(actions, gamma=gamma, explore="uniform")
  462:     agents = [qlearner_agent, random_agent]
  463  
  464      # Run Agents.
  465      if isinstance(mdp, MarkovGameMDP):
  466          # Markov Game.
  467:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  468          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  469      else:

/Users/dabel/Research/simple_rl/dist/simple_rl-0.77-py2.py3-none-any.whl:
    <binary>

/Users/dabel/Research/simple_rl/examples/abstraction_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import QLearnerAgent, RandomAgent
    9  from simple_rl.tasks import GridWorldMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   14      # Setup MDP, Agents.
   15      mdp = GridWorldMDP(width=10, height=10, init_loc=(1, 1), goal_locs=[(10, 10)])
   16:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   17      rand_agent = RandomAgent(actions=mdp.get_actions())
   18:     abstr_identity_agent = AbstractionWrapper(QLearnerAgent, agent_params={"epsilon":0.9}, actions=mdp.get_actions())
   19  
   20      # Run experiment and make plot.

/Users/dabel/Research/simple_rl/examples/bandit_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import LinUCBAgent, QLearnerAgent, RandomAgent
    9  from simple_rl.tasks import BanditMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   15  
   16      lin_agent = LinUCBAgent(actions=mdp.get_actions())
   17:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   18      rand_agent = RandomAgent(actions=mdp.get_actions())
   19  

/Users/dabel/Research/simple_rl/examples/blank_example.py:
    6  # Other imports.
    7  import examples.srl_example_setup
    8: from simple_rl.agents import QLearnerAgent, RandomAgent, DoubleQAgent
    9  from simple_rl.tasks import FourRoomMDP, ComboLockMDP
   10  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   17  
   18      dq_agent = DoubleQAgent(actions=mdp.get_actions())
   19:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   20      rand_agent = RandomAgent(actions=mdp.get_actions())
   21  

/Users/dabel/Research/simple_rl/examples/examples_overview.ipynb:
   35      "sys.path.insert(0, parent_dir)\n",
   36      "\n",
   37:     "from simple_rl.agents import QLearnerAgent, RandomAgent\n",
   38      "from simple_rl.tasks import GridWorldMDP\n",
   39      "from simple_rl.run_experiments import run_agents_on_mdp"
   ..
   60      "\n",
   61      "# Setup Agents.\n",
   62:     "ql_agent = QLearnerAgent(actions=mdp.get_actions()) \n",
   63      "rand_agent = RandomAgent(actions=mdp.get_actions())"
   64     ]
   ..
   84        "\tgridworld_h-6_w-6\n",
   85        "(Agents)\n",
   86:       "\tqlearner\n",
   87        "\trandom\n",
   88        "(Params)\n",
   ..
   91        "\tsteps : 40\n",
   92        "\n",
   93:       "qlearner is learning.\n",
   94        "  Instance 1 of 5.\n",
   95        "  Instance 2 of 5.\n",
   ..
  107        "\n",
  108        "--- TIMES ---\n",
  109:       "qlearner agent took 0.89 seconds.\n",
  110        "random agent took 0.35 seconds.\n",
  111        "-------------\n",
  ...
  152        "(Agents)\n",
  153        "\trmax-h3\n",
  154:       "\tqlearner\n",
  155        "\trandom\n",
  156        "(Params)\n",
  ...
  166        "  Instance 5 of 5.\n",
  167        "\n",
  168:       "qlearner is learning.\n",
  169        "  Instance 1 of 5.\n",
  170        "  Instance 2 of 5.\n",
  ...
  184        "random agent took 0.27 seconds.\n",
  185        "rmax-h3 agent took 70.88 seconds.\n",
  186:       "qlearner agent took 0.75 seconds.\n",
  187        "-------------\n",
  188        "\n"
  ...
  380      "from simple_rl.tasks import TaxiOOMDP\n",
  381      "from simple_rl.run_experiments import run_agents_on_mdp\n",
  382:     "from simple_rl.agents import QLearnerAgent, RandomAgent\n",
  383      "\n",
  384      "# Taxi initial state attributes..\n",
  ...
  388      "\n",
  389      "# Make agents.\n",
  390:     "ql_agent = QLearnerAgent(actions=taxi_mdp.get_actions()) \n",
  391      "rand_agent = RandomAgent(actions=taxi_mdp.get_actions())"
  392     ]
  ...
  412        "\ttaxi_h-4_w-4\n",
  413        "(Agents)\n",
  414:       "\tqlearner\n",
  415        "\trandom\n",
  416        "(Params)\n",
  ...
  419        "\tsteps : 150\n",
  420        "\n",
  421:       "qlearner is learning.\n",
  422        "  Instance 1 of 5.\n",
  423        "  Instance 2 of 5.\n",
  ...
  436        "--- TIMES ---\n",
  437        "random agent took 7.68 seconds.\n",
  438:       "qlearner agent took 15.32 seconds.\n",
  439        "-------------\n",
  440        "\n"
  ...
  493        "(Agents)\n",
  494        "\tfixed-policy\n",
  495:       "\tqlearner\n",
  496        "(Params)\n",
  497        "\tinstances : 10\n",
  ...
  523     "source": [
  524      "from simple_rl.run_experiments import play_markov_game\n",
  525:     "from simple_rl.agents import QLearnerAgent, FixedPolicyAgent\n",
  526      "from simple_rl.tasks import RockPaperScissorsMDP\n",
  527      "\n",
  ...
  530      "# Setup MDP, Agents.\n",
  531      "markov_game = RockPaperScissorsMDP()\n",
  532:     "ql_agent = QLearnerAgent(actions=markov_game.get_actions(), epsilon=0.2) \n",
  533      "fixed_action = random.choice(markov_game.get_actions())\n",
  534      "fixed_agent = FixedPolicyAgent(policy=lambda s:fixed_action)\n",
  ...
  604     "source": [
  605      "from simple_rl.tasks import GymMDP\n",
  606:     "from simple_rl.agents import LinearQLearnerAgent, RandomAgent\n",
  607      "from simple_rl.run_experiments import run_agents_on_mdp\n",
  608      "\n",
  ...
  612      "\n",
  613      "# Setup agents and run.\n",
  614:     "lin_agent = LinearQLearnerAgent(gym_mdp.get_actions(), num_features=num_feats, alpha=0.2, epsilon=0.4, rbf=True)\n",
  615      "\n",
  616      "run_agents_on_mdp([lin_agent], gym_mdp, instances=3, episodes=1, steps=50)"

/Users/dabel/Research/simple_rl/examples/goal_based_options_example.py:
    7  import examples.srl_example_setup
    8  from simple_rl.utils import make_mdp
    9: from simple_rl.agents import QLearnerAgent, RandomAgent
   10  from simple_rl.run_experiments import run_agents_multi_task
   11  from simple_rl.abstraction import AbstractionWrapper, aa_helpers, ActionAbstraction
   ..
   14      # Setup MDP, Agents.
   15      mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
   16:     ql_agent = QLearnerAgent(actions=mdp_distr.get_actions())
   17      rand_agent = RandomAgent(actions=mdp_distr.get_actions())
   18  
   ..
   20      goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
   21      goal_based_aa = ActionAbstraction(prim_actions=mdp_distr.get_actions(), options=goal_based_options)
   22:     option_agent = AbstractionWrapper(QLearnerAgent, actions=mdp_distr.get_actions(), action_abstr=goal_based_aa)
   23  
   24      # Run experiment and make plot.

/Users/dabel/Research/simple_rl/examples/gym_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import LinearQLearnerAgent, RandomAgent
   10  from simple_rl.tasks import GymMDP
   11  from simple_rl.run_experiments import run_agents_on_mdp
   ..
   17  
   18      # Setup agents and run.
   19:     lin_agent = LinearQLearnerAgent(gym_mdp.get_actions(), num_features=num_feats, rbf=True, alpha=0.1, epsilon=0.1, anneal=True)
   20      rand_agent = RandomAgent(gym_mdp.get_actions())
   21      run_agents_on_mdp([lin_agent, rand_agent], gym_mdp, instances=50, episodes=50, steps=100, open_plot=open_plot)

/Users/dabel/Research/simple_rl/examples/markov_game_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import QLearnerAgent, FixedPolicyAgent
   10  from simple_rl.tasks import RockPaperScissorsMDP
   11  from simple_rl.run_experiments import play_markov_game 
   ..
   14      # Setup MDP, Agents.
   15      markov_game = RockPaperScissorsMDP()
   16:     ql_agent = QLearnerAgent(actions=markov_game.get_actions())
   17      fixed_action = random.choice(markov_game.get_actions())
   18      fixed_agent = FixedPolicyAgent(policy=lambda s: fixed_action)

/Users/dabel/Research/simple_rl/examples/multitask_example.py:
    8  from simple_rl.mdp import MDPDistribution
    9  from simple_rl.tasks import GridWorldMDP, RandomMDP, ChainMDP, TaxiOOMDP, FourRoomMDP
   10: from simple_rl.agents import QLearnerAgent, RandomAgent, RMaxAgent
   11  from simple_rl.run_experiments import run_agents_multi_task
   12  from simple_rl.utils import make_mdp
   ..
   15      # Make MDP distribution, agents.
   16      mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
   17:     ql_agent = QLearnerAgent(actions=mdp_distr.get_actions())
   18      rand_agent = RandomAgent(actions=mdp_distr.get_actions())
   19  

/Users/dabel/Research/simple_rl/examples/new_mdp_example.py:
    7  # Other imports.
    8  import examples.srl_example_setup
    9: from simple_rl.agents import QLearnerAgent, RandomAgent
   10  from simple_rl.tasks import GridWorldMDP, GridWorldState
   11  from simple_rl.run_experiments import run_agents_on_mdp 
   ..
   83      # Setup MDP, Agents.
   84      mdp = ColoredGridWorldMDP(state_colors)
   85:     ql_agent = QLearnerAgent(actions=mdp.get_actions())
   86      rand_agent = RandomAgent(actions=mdp.get_actions())
   87  

/Users/dabel/Research/simple_rl/examples/.ipynb_checkpoints/examples_overview-checkpoint.ipynb:
   84        "\tgridworld_h-6_w-6\n",
   85        "(Agents)\n",
   86:       "\tqlearner\n",
   87        "\trandom\n",
   88        "(Params)\n",
   ..
   91        "\tsteps : 40\n",
   92        "\n",
   93:       "qlearner is learning.\n",
   94        "  Instance 1 of 5.\n",
   95        "  Instance 2 of 5.\n",
   ..
  107        "\n",
  108        "--- TIMES ---\n",
  109:       "qlearner agent took 1.25 seconds.\n",
  110        "random agent took 0.38 seconds.\n",
  111        "-------------\n",
  ...
  152        "(Agents)\n",
  153        "\trmax-h3\n",
  154:       "\tqlearner\n",
  155        "\trandom\n",
  156        "(Params)\n",
  ...
  166        "  Instance 5 of 5.\n",
  167        "\n",
  168:       "qlearner is learning.\n",
  169        "  Instance 1 of 5.\n",
  170        "  Instance 2 of 5.\n",
  ...
  184        "random agent took 0.27 seconds.\n",
  185        "rmax-h3 agent took 70.88 seconds.\n",
  186:       "qlearner agent took 0.75 seconds.\n",
  187        "-------------\n",
  188        "\n"
  ...
  412        "\ttaxi_h-4_w-4\n",
  413        "(Agents)\n",
  414:       "\tqlearner\n",
  415        "\trandom\n",
  416        "(Params)\n",
  ...
  419        "\tsteps : 150\n",
  420        "\n",
  421:       "qlearner is learning.\n",
  422        "  Instance 1 of 5.\n",
  423        "  Instance 2 of 5.\n",
  ...
  436        "--- TIMES ---\n",
  437        "random agent took 7.68 seconds.\n",
  438:       "qlearner agent took 15.32 seconds.\n",
  439        "-------------\n",
  440        "\n"
  ...
  493        "(Agents)\n",
  494        "\tfixed-policy\n",
  495:       "\tqlearner\n",
  496        "(Params)\n",
  497        "\tinstances : 10\n",

/Users/dabel/Research/simple_rl/examples/results/multitask-fourroom_h-9_w-9/parameters.txt:
    2  	multitask-fourroom_h-9_w-9
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

/Users/dabel/Research/simple_rl/results/chainmdp-4/parameters.txt:
    2  	chainmdp-4
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/chainmdp-5/parameters.txt:
    2  	chainmdp-5
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-10_w-10/parameters.txt:
    2  	gridworld_h-10_w-10
    3  (Agents)
    4: 	qlearner
    5  	rmax-h4
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-5_w-5/parameters.txt:
    2  	gridworld_h-5_w-5
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-6_w-6/parameters.txt:
    2  	gridworld_h-6_w-6
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/gridworld_h-8_w-8/parameters.txt:
    2  	gridworld_h-8_w-8
    3  (Agents)
    4: 	qlearner-softmax
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/results/in_pap_taxi_h-10_w-10/parameters.txt:
    2  	taxi_h-10_w-10
    3  (Agents)
    4: 	qlearner-softmax
    5: 	qlearner-uniform
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/results/pruning-taxi_h-10_w-10/parameters.txt:
    3  (Agents)
    4  	rmax-h2
    5: 	qlearner-softmax
    6: 	qlearner-uniform
    7  	random
    8  (Params)

/Users/dabel/Research/simple_rl/results/taxi_h-10_w-10/parameters.txt:
    2  	taxi_h-10_w-10
    3  (Agents)
    4: 	qlearner-uniform
    5: 	qlearner-softmax
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/results/taxi_h-5_w-5/parameters.txt:
    2  	taxi_h-5_w-5
    3  (Agents)
    4: 	qlearner-uniform
    5  	random
    6  (Params)

/Users/dabel/Research/simple_rl/simple_rl/run_experiments.py:
  457      
  458      random_agent = RandomAgent(actions)
  459:     qlearner_agent = QLearningAgent(actions, gamma=gamma, explore="uniform")
  460:     agents = [qlearner_agent, random_agent]
  461  
  462      # Run Agents.
  463      if isinstance(mdp, MarkovGameMDP):
  464          # Markov Game.
  465:         agents = {qlearner_agent.name: qlearner_agent, random_agent.name:random_agent}
  466          play_markov_game(agents, mdp, instances=100, episodes=1, steps=500)
  467      else:

/Users/dabel/Research/simple_rl/simple_rl/agents/__init__.py:
    3  
    4  	AgentClass: Contains the basic skeleton of an RL Agent.
    5: 	QLearningAgentClass: QLearner.
    6  	LinearQLearningAgentClass: Q Learner with a Linear Approximator.
    7  	RandomAgentClass: Random actor.

/Users/dabel/Research/simple_rl/simple_rl/experiments/results/gridworld_h-10_w-10/parameters.txt:
    2  	gridworld_h-10_w-10
    3  (Agents)
    4: 	qlearner-uniform
    5: 	qlearner-softmax
    6  	random
    7  (Params)

/Users/dabel/Research/simple_rl/tests/results/gridworld_h-3_w-2/parameters.txt:
    2  	gridworld_h-3_w-2
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

/Users/dabel/Research/simple_rl/tests/results/multitask-fourroom_h-7_w-7/parameters.txt:
    2  	multitask-fourroom_h-7_w-7
    3  (Agents)
    4: 	qlearner,0
    5  	random,1
    6  (Params)

133 matches across 40 files
